import gradio as gr
from transformers import pipeline

p = pipeline("automatic-speech-recognition")


def transcribe(audio):
    text = p(audio)["text"]
    return text


gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(source="microphone", type="filepath"),
    outputs="text").launch(server_name='0.0.0.0', server_port=7771, share=True)
